rm(list=ls(all=T))
library(sf)
library(tidyverse)
library(terra)
library(nhdplusTools)
library(mapview)
library(dataRetrieval)
library(lubridate)
library(prism)
library(ggspatial)
library(nngeo)# Added from OG code
library(stars)# Added from OG code
# this gives you an error, but it can be ignored:
try(plyr::ldply(list.files(path="src/",
pattern="*.R",
full.names=TRUE),
source))
FALSE Error in list_to_dataframe(res, attr(.data, "split_labels"), .id, id_as_factor) :
FALSE Results must be all atomic, or all data frames
# Rmarkdown options
knitr::opts_chunk$set(echo = T, warning = F, comment = F, message = F)
# mapview options
mapviewOptions(basemaps.color.shuffle=FALSE,basemaps='OpenTopoMap')
For this code to run properly, your site data must be configured as follows:
site.comid.data/
folder.Currently, this workflow requires downloading several data sets
locally for much speedier run times. This includes: PRISM climate &
aridity rasters, NHD flow direction data, and CONUS-wide NHD catchments.
All data sets are found in the shared data folder.
Use COMID to allow site linkages with all datasets in this workflow.
sf_use_s2(FALSE)
site_type = "comid" # This steps assumes you have checked your COMIDs
sites <- read_csv("data/WROL_comid.csv")
Pull all meta data associated with each site’s COMID.
if(site_type == "comid"){
sites <- getNHDcomid(df = dplyr::select(sites, site, comid))
}
FALSE [1] "42 locations linked to the NHD."
Make NHD-based watershed shapefiles for all CONUS sites. To make this
step MUCH faster, it is best to have a locally downloaded version on the
National NHD catchment shapefile stored on your local system. I have
already included this shapefile in the data folder.
site_watersheds <- getWatersheds(df = sites, make_pretty = TRUE) %>%
inner_join(., select(sf::st_drop_geometry(sites), site, comid), by = "comid")
Map all your sites. Maps are automatically stored in the
data/maps/ folder. It is highly recommended to review each
map, particularly for known locations along BIG rivers and TINY
streams.Note: COMIDs should have been checked before running this
code.
suppressWarnings(map2(sites$site, sites$comid, getMaps))
Interactive map showing all sites and their delineated watersheds:
mapview(site_watersheds, col.regions = "#56B4E9", alpha.regions = 0.2, lwd = 3, layer.name = "Watershed") +
mapview(sites, cex = 5, col.regions = "black", layer.name = "Points") +
mapview(st_read('data/site_flowlines.gpkg', quiet = T), lwd = 3, color = "red", layer.name = "Flowline")
This getStreamCat() function is adapted from Simon
Topp’s Lakecat extraction. StreamCat is huge (~600 possible
variables). And while EPA has since made an API
that interacts with StreamCat (which would make this code 1 billion
times faster), it wasn’t public when this code was written. So! We made
a function that:
To download the data that you want, be sure to update the
epa_categories vector argument in
getStreamCat() with the names of the categorized data sets
you are interested in downloading. A list of those data sets can be
found here.
Change save = TRUE to save = FALSE if you do
not want to keep all CONUS-level StreamCat data that you downloaded.
sites <- getStreamCat(sites = sites,
# Choose EPA categories to download here. These example two categories took a few minutes to download.
epa_categories = c("AgMidHiSlopes", "CoalMines", "CanalDensity", "ImperviousSurfaces", "NLCD2019", "Dams","FirePerimeters","Kffact",'Elevation',"NADP","RefStreamTempPred","RoadDensity","RoadStreamCrossings","Runoff","STATSGO_Set1","STATSGO_Set2","USCensus2010","WWTP","GeoChemPhys3","Lithology"),
save = FALSE)
Lastly, we pull in additional data that is not found in StreamCat. This includes PRISM climate data, aridity data, and Omernik Ecoregion data for each site’s coordinates (i.e., at the site location, NOT aggregated across its watershed). We also calculate mean aridity and dominant Ecoregion across site watersheds. Our net primary production layer is no longer available from NASA so this data set has been excluded from this data pull. I plan to update this workflow to include it again once it becomes available.
# Extract the mean aridity index within each site's watershed as well as each site's location
sites <- getAridity(df = sites, sf = site_watersheds)
# Extract Omernik ecoregion for each site's location
sites <- getOmernikSite(df = sites)
# Extract dominant Omernik ecoregion within each site's watershed
sites <- getOmernikWs(df = sites, sf = site_watersheds)
# Extract PRSIM ppt, tmean, tmax, and tmin data for each site's location
sites <- getPRISM(df = sites)
# Extract mean chemistry values within each site's watershed as well as each site's locationa
sites <- getChemistry(df = sites, sf = site_watersheds)
# Link to original NPP data set:
# https://lpdaac.usgs.gov/products/mod17a3hgfv006/ "Terra MODIS Net Primary Production Yearly L4 Global 500 m SIN Grid products are currently unavailable due to unexpected errors in the input data. Please note that a newer version of MODIS land products is available and plans are being developed for the retirement of Version 6 MODIS data products. Users are advised to transition to the improved Version 6.1 products as soon as possible."
Export data with only columns of interest
# data_final = sites %>%
# dplyr::select("site","comid","gnis_name","lengthkm","shape_length", "streamleve","streamorde",,"areasqkm","totdasqkm","divdasqkm","maxelevraw", "minelevraw","maxelevsmo", "minelevsmo","slope","elevfixed","hwtype", "slopelenkm","qa_ma", "WsPctFull.x","DamDensWs","DamNIDStorWs",
# "DamNrmStorWs","WsPctFull.y","PctImp2006Ws","PctImp2008Ws","PctImp2011Ws", "PctImp2001Ws","PctImp2013Ws","PctImp2019Ws","PctImp2016Ws" , "PctImp2004Ws","AriditySite","AridityWs","OmernikIIISite","OmernikIISite", "OmernikISite","OmernikIIIWs","OmernikIIWs","OmernikIWs","TmeanSite","TmaxSite","TminSite","PrecipSite","AluminumSite","CalciumSite","CopperSite", "IronSite","ManganeseSite","PhosphSite","AluminumWs","CalciumWs","CopperWs" ,"IronWs","ManganeseWs","PhosphWs")
#write.csv(data_final,paste0("Geospatial_short_",Sys.Date(),".csv"), row.names = F)
write.csv(sites,paste0("Geospatial_all_WROL_",Sys.Date(),".csv"), row.names = F)